Optimize floating-point minimum/maximum operations. Combine a floating-point compare + select into min/max operations on x86 using SSE2 instructions, where equivalent. Only handles the most common cases for now (but for both scalars and vectors): x > y ? x : y -> max x < y ? x : y -> min BUG=swiftshader:19 Change-Id: Ic87bfa90cefd8014af5624d85a2ecef1c891e328 Reviewed-on: https://chromium-review.googlesource.com/439814 Reviewed-by: Jim Stichnoth <stichnot@chromium.org> 
diff --git a/src/IceTargetLoweringX86BaseImpl.h b/src/IceTargetLoweringX86BaseImpl.h index bc17deb..c5eac33 100644 --- a/src/IceTargetLoweringX86BaseImpl.h +++ b/src/IceTargetLoweringX86BaseImpl.h 
@@ -260,10 +260,12 @@  invalidateProducersOnStore(&Instr);  // Check whether Instr is a valid producer.  Variable *Var = Instr.getDest(); - if (Var // only consider instructions with an actual dest var - && Var->getType() == IceType_i1 // only bool-type dest vars - && getProducerKind(&Instr) != PK_None) { // white-listed instructions - Producers[Var->getIndex()] = BoolFoldingEntry<Traits>(&Instr); + if (Var) { // only consider instructions with an actual dest var + if (isBooleanType(Var->getType())) { // only bool-type dest vars + if (getProducerKind(&Instr) != PK_None) { // white-listed instructions + Producers[Var->getIndex()] = BoolFoldingEntry<Traits>(&Instr); + } + }  }  // Check each src variable against the map.  FOREACH_VAR_IN_INST(Var, Instr) { @@ -3335,9 +3337,6 @@  Operand *Src1 = Fcmp->getSrc(1);  Variable *Dest = Fcmp->getDest();   - if (isVectorType(Dest->getType())) - llvm::report_fatal_error("Vector compare/branch cannot be folded"); -  if (Consumer != nullptr) {  if (auto *Select = llvm::dyn_cast<InstSelect>(Consumer)) {  if (lowerOptimizeFcmpSelect(Fcmp, Select)) @@ -3345,6 +3344,13 @@  }  }   + if (isVectorType(Dest->getType())) { + lowerFcmp(Fcmp); + if (Consumer != nullptr) + lowerSelectVector(llvm::cast<InstSelect>(Consumer)); + return; + } +  // Lowering a = fcmp cond, b, c  // ucomiss b, c /* only if C1 != Br_None */  // /* but swap b,c order if SwapOperands==true */ @@ -3509,8 +3515,12 @@  Operand *Src1 = legalize(Icmp->getSrc(1));  Variable *Dest = Icmp->getDest();   - if (isVectorType(Dest->getType())) - llvm::report_fatal_error("Vector compare/branch cannot be folded"); + if (isVectorType(Dest->getType())) { + lowerIcmp(Icmp); + if (Consumer != nullptr) + lowerSelectVector(llvm::cast<InstSelect>(Consumer)); + return; + }    if (!Traits::Is64Bit && Src0->getType() == IceType_i64) {  lowerIcmp64(Icmp, Consumer); @@ -6616,11 +6626,6 @@  void TargetX86Base<TraitsType>::lowerSelect(const InstSelect *Select) {  Variable *Dest = Select->getDest();   - if (isVectorType(Dest->getType())) { - lowerSelectVector(Select); - return; - } -  Operand *Condition = Select->getCondition();  // Handle folding opportunities.  if (const Inst *Producer = FoldingInfo.getProducerFor(Condition)) { @@ -6640,6 +6645,11 @@  }  }   + if (isVectorType(Dest->getType())) { + lowerSelectVector(Select); + return; + } +  Operand *CmpResult = legalize(Condition, Legal_Reg | Legal_Mem);  Operand *Zero = Ctx->getConstantZero(IceType_i32);  _cmp(CmpResult, Zero); @@ -6746,24 +6756,47 @@  Operand *CmpSrc1 = Fcmp->getSrc(1);  Operand *SelectSrcT = Select->getTrueOperand();  Operand *SelectSrcF = Select->getFalseOperand(); + Variable *SelectDest = Select->getDest();   - if (CmpSrc0->getType() != SelectSrcT->getType()) + // TODO(capn): also handle swapped compare/select operand order. + if (CmpSrc0 != SelectSrcT || CmpSrc1 != SelectSrcF)  return false;   - // TODO(sehr, stichnot): fcmp/select patterns (e,g., minsd/maxss) go here. + // TODO(sehr, stichnot): fcmp/select patterns (e.g., minsd/maxss) go here.  InstFcmp::FCond Condition = Fcmp->getCondition();  switch (Condition) {  default:  return false;  case InstFcmp::True: - case InstFcmp::False: - case InstFcmp::Ogt: - case InstFcmp::Olt: - (void)CmpSrc0; - (void)CmpSrc1; - (void)SelectSrcT; - (void)SelectSrcF;  break; + case InstFcmp::False: + break; + case InstFcmp::Ogt: { + Variable *T = makeReg(SelectDest->getType()); + if (isScalarFloatingType(SelectSrcT->getType())) { + _mov(T, legalize(SelectSrcT, Legal_Reg | Legal_Mem)); + _maxss(T, legalize(SelectSrcF, Legal_Reg | Legal_Mem)); + _mov(SelectDest, T); + } else { + _movp(T, legalize(SelectSrcT, Legal_Reg | Legal_Mem)); + _maxps(T, legalize(SelectSrcF, Legal_Reg | Legal_Mem)); + _movp(SelectDest, T); + } + return true; + } break; + case InstFcmp::Olt: { + Variable *T = makeReg(SelectSrcT->getType()); + if (isScalarFloatingType(SelectSrcT->getType())) { + _mov(T, legalize(SelectSrcT, Legal_Reg | Legal_Mem)); + _minss(T, legalize(SelectSrcF, Legal_Reg | Legal_Mem)); + _mov(SelectDest, T); + } else { + _movp(T, legalize(SelectSrcT, Legal_Reg | Legal_Mem)); + _minps(T, legalize(SelectSrcF, Legal_Reg | Legal_Mem)); + _movp(SelectDest, T); + } + return true; + } break;  }  return false;  } @@ -6794,6 +6827,7 @@  Variable *T = makeReg(SrcTy);  Operand *SrcTRM = legalize(SrcT, Legal_Reg | Legal_Mem);  Operand *SrcFRM = legalize(SrcF, Legal_Reg | Legal_Mem); +  if (InstructionSet >= Traits::SSE4_1) {  // TODO(wala): If the condition operand is a constant, use blendps or  // pblendw. 
diff --git a/tests_lit/llvm2ice_tests/fp.cmp.ll b/tests_lit/llvm2ice_tests/fp.cmp.ll index c406f58..ce646f2 100644 --- a/tests_lit/llvm2ice_tests/fp.cmp.ll +++ b/tests_lit/llvm2ice_tests/fp.cmp.ll 
@@ -1000,9 +1000,7 @@  }  ; CHECK-LABEL: selectFloatVarVar  ; CHECK: movss -; CHECK: ucomiss -; CHECK: ja -; CHECK: movss +; CHECK: minss  ; ARM32-LABEL: selectFloatVarVar  ; ARM32: vcmp.f32  ; ARM32-OM1: vmovne.f32 s{{[0-9]+}} @@ -1019,9 +1017,7 @@  }  ; CHECK-LABEL: selectDoubleVarVar  ; CHECK: movsd -; CHECK: ucomisd -; CHECK: ja -; CHECK: movsd +; CHECK: minsd  ; ARM32-LABEL: selectDoubleVarVar  ; ARM32: vcmp.f64  ; ARM32-OM1: vmovne.f64 d{{[0-9]+}}